/* * To change this template, choose Tools | Templates * and open the template in the editor. */ package buzzerproxy; /** * * @author Enger * @see: http://www.rgagnon.com/javadetails/java-0639.html */ import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URI; import java.net.URISyntaxException; import java.net.URL; import java.net.URLDecoder; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.logging.Level; import java.util.logging.Logger; import org.apache.http.Header; import org.apache.http.HttpResponse; import org.apache.http.HttpStatus; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.HttpClient; import org.apache.http.client.ResponseHandler; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.BasicResponseHandler; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.impl.conn.tsccm.ThreadSafeClientConnManager; import org.apache.http.protocol.BasicHttpContext; import org.apache.http.protocol.HttpContext; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; /** @see http://hc.apache.org/httpcomponents-client-ga/examples.html */ public class Http { private ArrayList validResultEntrys = new ArrayList(); public Http() { } public ArrayList getValidResults() { return validResultEntrys; } public void setValidResults(ResultContainer r) { if (r != null) { validResultEntrys.add(r); } } public String searchQueryBeautify(String str) { String back = ""; if (str != null) { back = str.replaceAll("\\s{1,}", "+"); } return back; } // public String searchQueryBeautify(String str){ public String get(String urlToGet) throws ClientProtocolException { HttpClient httpclient = new DefaultHttpClient(); String responseBody = null; String urlGetMe = null; try { URL url = new URL(urlToGet); URI uri = new URI(url.getProtocol(), url.getHost(), url.getPath(), url.getQuery(), null); // System.out.println("URI " + uri.toString() + " is OK"); urlGetMe = uri.toString(); } catch (MalformedURLException e) { System.out.println("URL " + urlToGet + " is a malformed URL"); } catch (URISyntaxException e) { System.out.println("URI " + urlToGet + " is a malformed URL"); } try { HttpGet httpget = new HttpGet(urlGetMe); // System.out.println("executing request " + httpget.getURI()); // Create a response handler ResponseHandler responseHandler = new BasicResponseHandler(); try { responseBody = httpclient.execute(httpget, responseHandler); } catch (IOException ex) { Logger.getLogger(Http.class.getName()).log(Level.SEVERE, null, ex); } catch (Exception e) { } // System.out.println("----------------------------------------"); // System.out.println(responseBody); // System.out.println("----------------------------------------"); } finally { // When HttpClient instance is no longer needed, // shut down the connection manager to ensure // immediate deallocation of all system resources httpclient.getConnectionManager().shutdown(); } return responseBody; } // public String get(String url) throws ClientProtocolException { public ArrayList extractLinks(String url) throws IOException { final ArrayList result = new ArrayList(); try { Jsoup.connect(url).timeout(Constant.HTTP_TIMEOUT); Jsoup.connect(url).ignoreHttpErrors(false); Document doc = Jsoup.connect(url).get(); Elements links = doc.select("a[href]"); // Elements media = doc.select("[src]"); // Elements imports = doc.select("link[href]"); // href ... for (Element link : links) { result.add(link.attr("abs:href")); } // // img ... // for (Element src : media) { // result.add(src.attr("abs:src")); // } // // js, css, ... // for (Element link : imports) { // result.add(link.attr("abs:href")); // } } catch (Exception e) { } return result; }// public List extractLinks(String url) throws IOException { public void prepareParallelCheck(ArrayList urisToGet1, String filetyp, Http h) { // Create an HttpClient with the ThreadSafeClientConnManager. // This connection manager must be used if more than one thread will // be using the HttpClient. // ThreadSafeClientConnManager cm = new ThreadSafeClientConnManager(); // // cm.setMaxTotal(urisToGet1.size()); // cm.setMaxTotal(10000); IO io = new IO(); //String[] urisToGet = new String[urisToGet1.size()]; ArrayList urisToGet = new ArrayList(); // HttpClient httpclient = new DefaultHttpClient(cm); // SimpleDateFormat simple = new SimpleDateFormat("yyyy:MM:dd:HH:mm:ss:SS"); // Date d = new Date(); // String strDate = simple.format(d); // String fExt = ""; // try { // create an array of URIs to perform GETs on int count = 0; String decodedURI = ""; for (String uri : urisToGet1) { try { decodedURI = URLDecoder.decode(uri, "UTF-8"); } catch (UnsupportedEncodingException ex) { Logger.getLogger(Http.class.getName()).log(Level.SEVERE, null, ex); } urisToGet.add(decodedURI); } // for ( String uri : urisToGet1){ // create a thread for each URI GetFileTypThread[] threads = new GetFileTypThread[urisToGet.size()]; //System.out.println("threads.length:"+threads.length); for (int i = 0; i < threads.length; i++) { try { // HttpGet httpget = new HttpGet(urisToGet.get(i)); // threads[i] = new GetThread(httpclient, httpget, i + 1, h); threads[i] = new GetFileTypThread(urisToGet.get(i), filetyp, h, i + 1); } catch (Exception e) { } } // start the threads for (int j = 0; j < threads.length; j++) { try { // System.out.println("THREADING:" + threads[j].uri); threads[j].start(); } catch (Exception e) { } } // join the threads for (int j = 0; j < threads.length; j++) { try { threads[j].join(); } catch (InterruptedException ex) { Logger.getLogger(Http.class.getName()).log(Level.SEVERE, null, ex); } catch (Exception e) { } } // } finally { // // When HttpClient instance is no longer needed, // // shut down the connection manager to ensure // // immediate deallocation of all system resources // httpclient.getConnectionManager().shutdown(); // } } // public void prepareParallelCheck(ArrayList urisToGet1 ){ /** * A thread that performs a GET. */ static class GetFileTypThread extends Thread { private final String uri; private final String filetyp; private final int id; private Http h; private long epoch = System.currentTimeMillis()/1000; IO i = new IO(); // aString strDate = simple.format(d); public GetFileTypThread(String uri, String filetyp, Http h, int id) { this.uri = uri; this.filetyp = filetyp; this.id = id; this.h = h; } /** * Executes the GetMethod and prints some status information. */ @Override public void run() { // System.out.println(id + " - about to get something from " ); String cl = h.getContentLength(uri); String fExt = i.getFileExtension(uri); if (filetyp.contains(fExt)) { // if (io.isValidFileTyp(decodedURI, filetyp)) { if (uri != null && cl != null && cl.length()>0 ) { ResultContainer r = new ResultContainer(); r.setLastChecked(epoch); r.setResultUrl(uri); r.setSizeOfUrlContent(cl); r.setFileTyp(fExt); h.setValidResults(r); } // if ( uri != null && cl != null ) { // System.out.println(id + " > Valid <" + uri); } // if (filetyp.contains(fExt)) { } // public void run() { } // static class GetFileTypThread extends Thread { /** * A thread that performs a GET. */ static class GetThread extends Thread { private final HttpClient httpClient; private final HttpContext context; private final HttpGet httpget; private final int id; private Http h; //Http h = new Http(); IO i = new IO(); public GetThread(HttpClient httpClient, HttpGet httpget, int id, Http h) { this.httpClient = httpClient; this.context = new BasicHttpContext(); this.httpget = httpget; this.id = id; this.h = h; } /** * Executes the GetMethod and prints some status information. */ @Override public void run() { // System.out.println(id + " - about to get something from " + httpget.getURI()); try { String url = httpget.getURI().toString(); CheckStatus status = new CheckStatus(); boolean isAlive = false; try { isAlive = status.isAlive(url); } catch (ClientProtocolException ex) { Logger.getLogger(GoogleResultParser.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(GoogleResultParser.class.getName()).log(Level.SEVERE, null, ex); } // SimpleDateFormat simple = new SimpleDateFormat("yyyy:MM:dd:HH:mm:ss:SS"); // Date d = new Date(); // String strDate = simple.format(d); //setLastChecked(strDate); String f = i.getFileExtension(url); long strDate = System.currentTimeMillis()/1000; if (isAlive) { ResultContainer r = new ResultContainer(); r.setLastChecked(strDate); r.setResultUrl(url); r.setSizeOfUrlContent(status.getContentLenght()); r.setFileTyp(f); System.out.println(url + " > am leben "); // setze den ResultContainer r in die ArrayList h ein h.setValidResults(r); } else { System.out.println(url + " > tot "); } // // execute the method // HttpResponse response = httpClient.execute(httpget, context); // // System.out.println(id + " - get executed"); // // get the response body as an array of bytes // HttpEntity entity = response.getEntity(); // if (entity != null) { // byte[] bytes = EntityUtils.toByteArray(entity); // System.out.println(id + " - " + bytes.length + " bytes read"); // } } catch (Exception e) { httpget.abort(); System.out.println(id + " - error: " + e); } } // public void run() { } // static class GetThread extends Thread { public String getContentLength(String urlToGet) { String urlGetMe = ""; String cl = ""; try { URL url = new URL(urlToGet); URI uri = new URI(url.getProtocol(), url.getHost(), url.getPath(), url.getQuery(), null); // System.out.println("URI " + uri.toString() + " is OK"); urlGetMe = uri.toString(); } catch (MalformedURLException e) { System.out.println("URL " + urlToGet + " is a malformed URL"); } catch (URISyntaxException e) { System.out.println("URI " + urlToGet + " is a malformed URL"); } HttpClient client = new DefaultHttpClient(); HttpGet method = new HttpGet(urlGetMe); HttpResponse httpResponse = null; try { httpResponse = client.execute(method); int statusCode = httpResponse.getStatusLine().getStatusCode(); if (statusCode == HttpStatus.SC_OK) { //isAlive = true; if (httpResponse != null) { Header h[] = httpResponse.getAllHeaders(); for (int i = 0; i < h.length; i++) { // System.out.println( h[i].getName() + " => " + h[i].getValue()) ; if (h[i].getName().contains("Content-Length")) { cl = h[i].getValue(); return cl; } // if (h[i].getName().contains("Content-Length")) { } // for (int i = 0; i < h.length; i++) { } // if ( httpResponse != null ) { } // if (statusCode == HttpStatus.SC_OK) { } catch (Exception e) { } // if (httpResponse != null) { // Header h[] = httpResponse.getAllHeaders(); // for (int i = 0; i < h.length; i++) { // // System.out.println( h[i].getName() + " => " + h[i].getValue()) ; // if (h[i].getName().contains("Content-Length")) { // cl = h[i].getValue(); // return cl; // } // if (h[i].getName().contains("Content-Length")) { // } // for (int i = 0; i < h.length; i++) { // } // if ( httpResponse != null ) { return cl; } // public String getContentLength( String urlToGet){ } // public class Http { // public boolean isAlive(String url) { // // if (url == null || !(url instanceof String)) { // Exception e = new Exception(); // Logger.getLogger(Http.class.getName()).log(Level.SEVERE, null, e); // System.exit(1); // } // if ( str == null || !(str instanceof String)){ // // URL url1 = null; // try { // url1 = new URL(url); // } catch (MalformedURLException ex) { // Logger.getLogger(Http.class.getName()).log(Level.SEVERE, null, ex); // } // try { // url1 = new URL(url); // } catch (MalformedURLException ex) { // Logger.getLogger(Http.class.getName()).log(Level.SEVERE, null, ex); // } // HttpURLConnection code = null; // try { // code = (HttpURLConnection) url1.openConnection(); // } catch (IOException ex) { // Logger.getLogger(Http.class.getName()).log(Level.SEVERE, null, ex); // } // try { // System.out.println(code.getResponseCode()); // } catch (IOException ex) { // Logger.getLogger(Http.class.getName()).log(Level.SEVERE, null, ex); // } // return true; // } // public boolean isAlive(String url){